import re
import requests
import csv

url = 'https://movie.douban.com/top250'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36"
}
res = requests.get(url,headers=headers)
page_content = res.text

# 解析数据
obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">.*?<br>(?P<year>.*?)&nbsp.*?'
                 r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?'
                 r'<span>(?P<people>.*?)人评价</span>',re.S)
# 开始匹配
result = obj.finditer(page_content)

# 以CSV的格式存储文件
f = open(r'/reptile/data/data.csv', 'w', encoding='utf-8')
csvwriter = csv.writer(f)

for it in result:
    dic = it.groupdict()
    dic['year'] = dic['year'].strip()
    csvwriter.writerow(dic.values())

    # print(f"\n电影名：{it.group('name')}")
    # print(f"年份为：{it.group('year').strip()}")
    # print(f"评分为：{it.group('score').strip()}")
    # print(f"{it.group('year').strip()}人评价")

# 每次关闭掉
res.close()

print('over!')

